This notebook contains scripts that evaluate sensitivity of TCS compared to cluster-based statistic.
Loading required packages
import os
import numpy as np
import pandas as pd
import nibabel as nib
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
Basic functions
def ensure_dir(file_name):
os.makedirs(os.path.dirname(file_name), exist_ok=True)
return file_name
def write_np(np_obj, file_path):
with open(file_path, 'wb') as outfile:
np.save(outfile, np_obj)
def load_np(file_path):
with open(file_path, 'rb') as infile:
return np.load(infile)
Plot settings (latex is used for better plotting)
sns.set()
sns.set_style("darkgrid")
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.rc('text', usetex=True)
plt.rc('text.latex', preamble=r'\usepackage{mathtools} \usepackage{sfmath}')
plt.rc('xtick', labelsize=20)
plt.rc('ytick', labelsize=20)
plt.rc('axes', labelsize=24)
plt.rc('figure', dpi=500)
The ground truth stored in notebook 2 is loaded here.
# list of all tasks and the cope number related to each selected contrast
tasks = {
'EMOTION': '3', # faces - shapes
'GAMBLING': '6', # reward - punish
'RELATIONAL': '4', # rel - match
'SOCIAL': '6', # tom - random
'WM': '20', # face - avg
}
# Compute mean and std, followed by a parametric z-score (one sample t-test)
ground_truth_effect = {}
# Base directory where files are stored at
base_dir='/data/netapp01/work/sina/structural_clustering/PALM_revision_1'
for task in tqdm(tasks, desc="Tasks loop", leave=True):
ground_truth_effect[task] = load_np(
'{}/ground_truth/cohen_d_{}_cope{}.dscalar.npy'.format(base_dir, task, tasks[task]),
)
Tasks loop: 0%| | 0/5 [00:00<?, ?it/s]
PALM results stored in notebook 1 is loaded here.
%%time
# Number of random repetitions
repetitions = 500
# Different sample sizes tested
# sample_sizes = [10, 20, 40, 80, 160, 320]
sample_sizes = [40]
# Different cluster defining thresholds
cdts = [3.3, 2.8, 2.6, 2.0, 1.6]
# Number of brainordinates in a cifti file
Nv = 91282
# Base directory where files are stored at
base_dir='/data/netapp01/work/sina/structural_clustering/PALM_revision_1'
# Store loaded results in nested python dictionaries
loaded_maps = {}
loaded_maps['uncorrected_tstat'] = {}
loaded_maps['spatial_cluster_corrected_tstat'] = {}
loaded_maps['topological_cluster_corrected_tstat'] = {}
# Only use the z=3.3, p=0.001 for the main analyses reported here
cdt = 3.3
for task in tqdm(tasks, desc="Tasks loop", leave=True):
loaded_maps['uncorrected_tstat'][task] = {}
loaded_maps['spatial_cluster_corrected_tstat'][task] = {}
loaded_maps['topological_cluster_corrected_tstat'][task] = {}
for sample_size in tqdm(sample_sizes, desc="Sample size loop", leave=False):
loaded_maps['uncorrected_tstat'][task][f'N={sample_size}'] = load_np(
f'{base_dir}/summary/uncorrected_tstat_{task}_{sample_size}_samples_{cdt}_CDT.npy',
)
loaded_maps['spatial_cluster_corrected_tstat'][task][f'N={sample_size}'] = load_np(
ensure_dir(f'{base_dir}/summary/spatial_cluster_corrected_tstat_{task}_{sample_size}_samples_{cdt}_CDT.npy'),
)
loaded_maps['topological_cluster_corrected_tstat'][task][f'N={sample_size}'] = load_np(
ensure_dir(f'{base_dir}/summary/topological_cluster_corrected_tstat_{task}_{sample_size}_samples_{cdt}_CDT.npy'),
)
Tasks loop: 0%| | 0/5 [00:00<?, ?it/s]
Sample size loop: 0%| | 0/1 [00:00<?, ?it/s]
Sample size loop: 0%| | 0/1 [00:00<?, ?it/s]
Sample size loop: 0%| | 0/1 [00:00<?, ?it/s]
Sample size loop: 0%| | 0/1 [00:00<?, ?it/s]
Sample size loop: 0%| | 0/1 [00:00<?, ?it/s]
CPU times: user 123 ms, sys: 9.23 s, total: 9.35 s Wall time: 57.5 s
Script below generates the results of sensitivity analysis reported in the manuscript
import scipy.stats as stats
from scipy.interpolate import CubicSpline
from scipy.interpolate import UnivariateSpline
from statsmodels.stats.power import TTestPower
from matplotlib.patches import Patch
%config InlineBackend.figure_format = 'retina'
plt.rc('figure', dpi=500)
analysis = TTestPower()
fig = plt.figure(figsize=(30, 12),constrained_layout=True)
gs = fig.add_gridspec(3, 5)
sample_size = 40
sample_sizes = [10, 20, 40, 80, 160, 320]
sample_colors = np.array(sns.color_palette("rainbow", len(sample_sizes)))
logp_threshold = -np.log10(0.05)
for ci, task in enumerate(tasks):
for ri, method in enumerate(['spatial', 'topological', 'difference']):
ax = fig.add_subplot(gs[ri, ci])
scatterx = ground_truth_effect[task]
si = 2
t_stats = loaded_maps['uncorrected_tstat'][task][f'N={sample_size}']
t_stats = t_stats[~np.isnan(t_stats).any(axis=1)]
if method == 'difference':
topological_cluster_logps = loaded_maps['topological_cluster_corrected_tstat'][task][f'N={sample_size}']
topological_cluster_logps = topological_cluster_logps[~np.isnan(topological_cluster_logps).any(axis=1)]
topological_positive_effects = np.multiply(np.mean((topological_cluster_logps>logp_threshold) & (t_stats>0), 0), (ground_truth_effect[task]>0))
topological_negative_effects = np.multiply(np.mean((topological_cluster_logps>logp_threshold) & (t_stats<0), 0), (ground_truth_effect[task]<0))
spatial_cluster_logps = loaded_maps['spatial_cluster_corrected_tstat'][task][f'N={sample_size}']
spatial_cluster_logps = spatial_cluster_logps[~np.isnan(spatial_cluster_logps).any(axis=1)]
spatial_positive_effects = np.multiply(np.mean((spatial_cluster_logps>logp_threshold) & (t_stats>0), 0), (ground_truth_effect[task]>0))
spatial_negative_effects = np.multiply(np.mean((spatial_cluster_logps>logp_threshold) & (t_stats<0), 0), (ground_truth_effect[task]<0))
topological_scattery = (topological_positive_effects + topological_negative_effects)
spatial_scattery = (spatial_positive_effects + spatial_negative_effects)
scattery = topological_scattery - spatial_scattery
xlim = (-1.5,1.5)
# cubic spline fit
bins = np.linspace(max(xlim[0], scatterx.min()), min(xlim[1], scatterx.max()), 31)
digitized = np.digitize(scatterx, bins)
x_means = [scatterx[(digitized == i) | (digitized == i + 1)].mean() for i in range(1, len(bins) - 1)]
x_centers = bins[1:-1]
y_means = [scattery[(digitized == i) | (digitized == i + 1)].mean() for i in range(1, len(bins) - 1)]
y_sems = [stats.sem(scattery[(digitized == i) | (digitized == i + 1)]) for i in range(1, len(bins) - 1)]
cs = CubicSpline(x_means, y_means, bc_type='natural', extrapolate=False)
cs_sem = CubicSpline(x_means, y_sems, bc_type='natural', extrapolate=False)
sample_x = np.linspace(scatterx.min(),scatterx.max(),200)
sample_y = cs(sample_x)
sample_y_sem = cs_sem(sample_x)
sns.lineplot(
x=sample_x,
y=sample_y,
style=True,
dashes=[(1,3)],
color=(0.01,0.05,0.07,1),
legend=False,
linewidth=3,
)
ax.fill_between(
sample_x,
sample_y - (sample_y_sem*1.96),
sample_y + (sample_y_sem*1.96),
color = np.append(sample_colors[si], 0.3),
)
ax.errorbar(
x_means,
y_means,
xerr=(np.array(x_means) * 0),
yerr=(np.array(y_sems)*1.96),
color=np.append(sample_colors[si]/2, 1),
fmt='.',
linewidth=0,
elinewidth=2,
ms=2,
)
ax.axhline(y=np.nanmax(sample_y), xmin=xlim[0], xmax=xlim[1], dashes=(2,2), color=(0.9,0.5,0.5,1), linewidth=3,)
if (np.nanmax(sample_y) < 0.06):
ax.text(xlim[1]-0.45, float(np.nanmax(sample_y)) - 0.005, '\\textbf{{ {:.0f}\% }}'.format(100*float(np.nanmax(sample_y))), fontsize=20)
else:
ax.text(xlim[1]-0.45, float(np.nanmax(sample_y)) - 0.01, '\\textbf{{ {:.0f}\% }}'.format(100*float(np.nanmax(sample_y))), fontsize=20)
ax.vlines(
x=-0.5,
ymin=-0,
ymax=np.nanmax(sample_y),
linestyles='dashed',
colors=[(0.9,0.5,0.5,1)],
linewidth=3,
)
ax.vlines(
x=0.5,
ymin=-0,
ymax=np.nanmax(sample_y),
linestyles='dashed',
colors=[(0.9,0.5,0.5,1)],
linewidth=3,
)
ax.text(-0.5 + 0.0001, float(cs(-0.5)), '\\boldmath{{$\\Rightarrow {:.0f}\% $}}'.format(100*float(cs(-0.5))), fontsize=20, color=(0.9,0.25,0.25,1))
ax.text(0.5 + 0.0001, float(cs(0.5)), '\\boldmath{{$\\Rightarrow {:.0f}\% $}}'.format(100*float(cs(0.5))), fontsize=20, color=(0.9,0.25,0.25,1))
ax.set_xlim(xlim)
if (np.nanmax(sample_y) < 0.06):
ax.set_ylim(-0.005, 0.055)
else:
ax.set_ylim(-0.005, 0.11)
else:
cluster_logps = loaded_maps[f'{method}_cluster_corrected_tstat'][task][f'N={sample_size}']
cluster_logps = cluster_logps[~np.isnan(cluster_logps).any(axis=1)]
positive_effects = np.multiply(np.mean((cluster_logps>logp_threshold) & (t_stats>0), 0), (ground_truth_effect[task]>0))
negative_effects = np.multiply(np.mean((cluster_logps>logp_threshold) & (t_stats<0), 0), (ground_truth_effect[task]<0))
scattery = positive_effects + negative_effects
sns.scatterplot(
x=scatterx,
y=scattery,
ax = ax,
s=10,
legend=False,
# color=(0.1,0.5,0.7,0.1),
color=np.append(sample_colors[si], 0.1),
linewidth=0,
)
effects = np.linspace(-1.5,1.5,200)
nocorr = [analysis.power(effect_size=x, nobs=sample_size, alpha=0.05) for x in effects]
boncorr = [analysis.power(effect_size=x, nobs=sample_size, alpha=0.05/Nv) for x in effects]
sns.lineplot(
x=effects,
y=nocorr,
style=True,
dashes=[(2,2)],
color=(.3,.3,.3,1),
legend=False,
)
sns.lineplot(
x=effects,
y=boncorr,
style=True,
dashes=[(2,2)],
color=(.3,.3,.3,1),
legend=False,
)
ax.vlines(
x=-0.5,
ymin=analysis.power(effect_size=-0.5, nobs=40, alpha=0.05/Nv),
ymax=analysis.power(effect_size=-0.5, nobs=40, alpha=0.05),
linestyles='dashed',
colors=[(0.9,0.5,0.5,1)],
linewidth=3,
)
ax.vlines(
x=0.5,
ymin=analysis.power(effect_size=0.5, nobs=40, alpha=0.05/Nv),
ymax=analysis.power(effect_size=0.5, nobs=40, alpha=0.05),
linestyles='dashed',
colors=[(0.9,0.5,0.5,1)],
linewidth=3,
)
# cubic spline fit
bins = np.linspace(scatterx.min(), scatterx.max(), 31)
digitized = np.digitize(scatterx, bins)
x_means = [scatterx[(digitized == i) | (digitized == i + 1)].mean() for i in range(1, len(bins) - 1)]
y_means = [scattery[(digitized == i) | (digitized == i + 1)].mean() for i in range(1, len(bins) - 1)]
y_sems = [stats.sem(scattery[(digitized == i) | (digitized == i + 1)]) for i in range(1, len(bins) - 1)]
cs = CubicSpline(x_means, y_means, bc_type='natural', extrapolate=False)
cs_sem = CubicSpline(x_means, y_sems, bc_type='natural', extrapolate=False)
sample_x = np.linspace(scatterx.min(),scatterx.max(),200)
sample_y = cs(sample_x)
sample_y_sem = cs_sem(sample_x)
sns.lineplot(
x=sample_x,
y=sample_y,
style=True,
color=np.append(sample_colors[si]/2, 1),
legend=False,
linewidth=3,
)
ax.text(-0.5 + 0.0005, float(cs(-0.5)), '\\boldmath{{$\\Rightarrow {:.0f}\% $}}'.format(100*float(cs(-0.5))), fontsize=20, color=(0.9,0.25,0.25,1))
ax.text(0.5 + 0.0005, float(cs(0.5)), '\\boldmath{{$\\Rightarrow {:.0f}\% $}}'.format(100*float(cs(0.5))), fontsize=20, color=(0.9,0.25,0.25,1))
ax.text(0. + 0.05, float(cs(0.7)), '\\boldmath{{$\mu = {:.0f}\% $}}'.format(100*float(scattery[np.abs(scatterx)>0.2].mean())), fontsize=20, va='center', ha='center')
ax.set_xlim(-1.5,1.5)
ax.set_ylim(-0.05,1.05)
xlabel = ''
if ri == 2:
xlabel = 'Effect size ($d$)'
ax.set_xlabel(xlabel, fontsize=40)
ylabel = ''
if ci == 0:
ylabel = '{}'.format(task)
ax.set_facecolor(np.array([234,234,242])/255)
ax.grid(color=(0.99,0.99,0.99,), linewidth=3)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.tick_params(axis='both', colors=(0.5,0.5,0.5), labelcolor=(0,0,0), direction='out')
plt.show()
posx and posy should be finite values posx and posy should be finite values posx and posy should be finite values posx and posy should be finite values posx and posy should be finite values posx and posy should be finite values